*****************************************************************************************
** TPG Replication: Vote Your Region or Your Income? Decomposing Variance in Redistributive Voting 
** Stata Do-file Created by Dong Wook Lee & Melisas Rogers  
** Software Version: StataNow/SE 18.5 for Windows [64-bit x86-64]
** Table 1. Economic Effects on Redistributive Voting Behavior            
*****************************************************************************************

** Multiple datasets are combined: 
** 1) Comparative Study of Electoral Systems (CSES) -- Integrated Module 1 through 4 
** 2) Comparative Manifesto Project (CMP) 
** 3) OECD's Regional Economy Statistics

******************************************************************************
** Data Loading for Vote Choice Analysis Using a Continous Measure (Model 1 through Model 3         ** 
******************************************************************************
*Setting a working directory
clear all
cd "PUT_YOUR_PATH_HERE\Table 1"

*Data Loading : Electoral District = TL3 Matching Case Only*
use "masterdata_electoral_district.dta"

*******************************
* Dependent Variable
*******************************
/*Postive Value: Parties with Less Redistributive Economic Policies & Negative Value: Parties with More Redistributive Economic Policies */
/*Combined measures of a party's policy position (economic topics)*/
gen rile_econ = log((per403 + per404 +  per406 + per412 + per413 + per504 + per506 + per701 + 0.5)/(per401 + per402 + per407 + per414 + per505 +0.5))
drop if rile_econ==.

*******************************
*Key Independnet Variables
*******************************
*Regional productivity qunitile (Used the per capita GDP of an electoral district, drawn from the OECD Regional Economy Statistics) 
tab rinc_quintile_t1, missing
tab rinc_quintile_t1, gen(rgroup_)

*Household income quintile
tab IMD2006, missing
recode IMD2006 (7 8 9 =.), gen(IMD2006_rev)
tab IMD2006_rev, missing 
drop if rinc_quintile_t1==. | IMD2006_rev==. 
tab country, missing

*Renaming Key Independent Variables
rename rinc_quintile_t1 rinc_quintile     /*LOW: Poor, High: Rich*/
rename IMD2006_rev hinc_quintile          /*LOW: Poor, High: Rich*/ 

********************************
*Individual-Level Control Variables
********************************
*Self-identification: Right (0)-Left (10)
recode IMD3006 (95 97 98 99 =.), gen(left_right)
recode left_right (0=10) (1=9) (2=8) (3=7) (4=6) (5=5) (6=4) (7=3) (8=2) (9=1) (10=0), gen(right_left)
*Age (16-95)
recode IMD2001_1 (9997 9999 =.), gen(age)
tab age
*Age squared
gen age_sq = age^2
*Male (1=Yes, 0=No)
recode IMD2002 (1=1) (2=0) (9=.), gen(male)
tab male    
*Higher education (1=Yes, 0=No)
recode IMD2003 (0=0) (1 2 = 0) (3 4 =1) (6 7 8 9 =.), gen(educationlevel)
tab educationlevel
*Married (1= Yes, 0=No)                  
recode IMD2004 (1 3 =1) (2 4 =0) (5 7 9 =.), gen(married)
tab married
*Religiosity (1=Yes, 0 = No)                         
recode IMD2005 (1 2 3 4 5 7 8 9 11 = 1) (12 13 96 97  = 0) ( 98 99 =.), gen(religiosity)
tab religiosity

***********************************
*Group-level Control Variables
***********************************
*voter turnout %
rename IMD5006_1 voteturnout
*country-specific election year
rename IMD1008_YEAR electionyear
*Assigning country id
egen countryid = group(country)

/*Country Name (Labeling)*/
tab countryid, gen(countryid_)
tab countryid country
rename countryid_1 austria
rename countryid_2 czech_republic
rename countryid_3 denmark
rename countryid_4 finland 
rename countryid_5 germany
rename countryid_6 greece
rename countryid_7 italy
rename countryid_8 latvia
rename countryid_9 norway
rename countryid_10 poland 
rename countryid_11 spain
rename countryid_12 switzerland
drop if denmark==1 | latvia==1 | norway==1    /*Due to missing observations*/

***************************************
** Model 1 (Continuous Measure) Regression 
***************************************
/*Two-stage estimate of β (OLS estimates)*/
reg rile_econ hinc_quintile rinc_quintile right_left age age_sq male educationlevel married religiosity czech_republic finland germany greece italy poland spain switzerland i.electionyear
predict k if e(sample), xb
gen resid = rile_econ-k
egen stresid=std(resid)
gen outlier = 0 if e(sample)
replace outlier = 1 if abs(stresid)>1.5
reg rile_econ hinc_quintile rinc_quintile right_left age age_sq male educationlevel married religiosity czech_republic finland germany greece italy poland spain switzerland i.electionyear if outlier!=1
predict R, r                                                                                            /*OLS residual*/
gen R2=R^2                                                                                              /*squared residuals for glm fit*/
glm R2 rinc_quintile hinc_quintile voteturnout, family(gamma) link(log)                                 /*gamma reg on log(r2)*/
predict S2, mu                                                                                          /*fitted variances, exp(xb)*/
gen LOGLIK = -.5*(ln(S2)+(R2/S2))                                                                                                                              /*evaluating log likelihood*/
egen LL0 = sum(LOGLIK)                                                                                  /*summing log likelihood*/
display LL0

/*Updating beta and lambda coefficients*/
gen DLL=1                                                                                              /*initialize change in loglik*/
while DLL> 0.00001{                                    
drop R k resid stresid outlier                                                                                        
quietly: reg rile_econ hinc_quintile rinc_quintile right_left age age_sq male educationlevel married religiosity czech_republic finland germany greece italy poland spain switzerland i.electionyear [aw=1/S2] /*WLS with variances as weights*/

predict k if e(sample), xb
gen resid = rile_econ-k
egen stresid=std(resid)
gen outlier = 0 if e(sample)
replace outlier = 1 if abs(stresid)>1.5

quietly: reg rile_econ hinc_quintile rinc_quintile right_left age age_sq male educationlevel married religiosity czech_republic finland germany greece italy poland spain switzerland i.electionyear [aw=1/S2] if outlier!=1 /*WLS with variances as weights*/
drop S2
predict R, r                                                                                           /*WLS residuals*/
replace R2 = R^2                                                                                       /*squared residuals for glm fit*/
est store BETA                                                                                         /*saving beta coefficient (hinc_qunitile)*/
quietly: glm R2 hinc_quintile rinc_quintile voteturnout , family(gamma) link(log)                      /*gamma reg on log(r2)*/
predict S2, mu                                                                                         /*fitted variances, exp(Xb)*/
est store LAMBDA                                                                                       /*saving lambda coefficeints (rinc_quntile)*/
replace LOGLIK = -.5*(ln(S2)+(R2/S2))                                                                                                                      /*evaluating log likelihood*/
egen LLN = sum(LOGLIK)                                                                                 /*summing log likelihood*/
di LLN
replace DLL=LLN-LL0                                                                                    /*assess convergence*/
replace LL0=LLN      
drop LLN
}

est table BETA LAMBDA, b star(.1 .05 .01) stats(N ll aic bic)                                         /*table with coefficeints, P<|Z|, and se's*/
est table BETA LAMBDA, b se stats(N ll aic bic) b(%7.3f) se(%7.3f)
drop R2 LOGLIK LL0 DLL k resid stresid outlier R _est_BETA S2 _est_LAMBDA

****************************************************************
** Model 2 (Continuous Measure) + 5 Regional Income Group Dummies  
****************************************************************
/*Two-stage estimate of β (OLS estimates)*/
reg rile_econ hinc_quintile i.rinc_quintile right_left age age_sq male educationlevel married religiosity czech_republic finland germany greece italy poland spain switzerland i.electionyear   
predict k if e(sample), xb
gen resid = rile_econ-k
egen stresid=std(resid)
gen outlier = 0 if e(sample)
replace outlier = 1 if abs(stresid)>1.5
reg rile_econ hinc_quintile i.rinc_quintile right_left age age_sq male educationlevel married religiosity czech_republic finland germany greece italy poland spain switzerland i.electionyear  if outlier!=1
predict R, r                                                                                            /*OLS residual*/
gen R2=R^2                                                                                              /*squared residuals for glm fit*/
glm R2 i.rinc_quintile hinc_quintile voteturnout, family(gamma) link(log)                               /*gamma reg on log(r2)*/
predict S2, mu                                                                                          /*fitted variances, exp(xb)*/
gen LOGLIK = -.5*(ln(S2)+(R2/S2))                                                                                                                              /*evaluating log likelihood*/
egen LL0 = sum(LOGLIK)                                                                                  /*summing log likelihood*/
display LL0

/*Updating beta and lambda coefficients*/
gen DLL=1                                                                                              /*initialize change in loglik*/
while DLL> 0.00001{                                    
drop R k resid stresid outlier                                                                                        
quietly: reg rile_econ hinc_quintile i.rinc_quintile right_left age age_sq male educationlevel married religiosity czech_republic finland germany greece italy poland spain switzerland i.electionyear  [aw=1/S2] /*WLS with variances as weights*/
predict k if e(sample), xb
gen resid = rile_econ-k
egen stresid=std(resid)
gen outlier = 0 if e(sample)
replace outlier = 1 if abs(stresid)>1.5
quietly: reg rile_econ hinc_quintile i.rinc_quintile right_left age age_sq male educationlevel married religiosity czech_republic finland germany greece italy poland spain switzerland i.electionyear [aw=1/S2] if outlier!=1 /*WLS with variances as weights*/
drop S2
predict R, r                                                                                           /*WLS residuals*/
replace R2 = R^2                                                                                       /*squared residuals for glm fit*/
est store BETA                                                                                         /*saving beta coefficient (hinc_qunitile)*/
quietly: glm R2 hinc_quintile i.rinc_quintile voteturnout, family(gamma) link(log)                     /*gamma reg on log(r2)*/
predict S2, mu                                                                                         /*fitted variances, exp(Xb)*/
est store LAMBDA                                                                                       /*saving lambda coefficeints (rinc_quntile)*/
replace LOGLIK = -.5*(ln(S2)+(R2/S2))                                                                                                                      /*evaluating log likelihood*/
egen LLN = sum(LOGLIK)                                                                                 /*summing log likelihood*/
di LLN
replace DLL=LLN-LL0                                                                                    /*assess convergence*/
replace LL0=LLN      
drop LLN
}

est table BETA LAMBDA, b star(.1 .05 .01) stats(N ll aic bic)                                           /*table with coefficeints, P<|Z|, and se's*/
est table BETA LAMBDA, b se stats(N ll aic bic) b(%7.3f) se(%7.3f)
drop R2 LOGLIK LL0 DLL k resid stresid outlier R _est_BETA S2 _est_LAMBDA

*******************************************************************************
** Model 3 (Continous Measure)  + Richest and Poorest Income Group Dummies Only       
*******************************************************************************
*Selected Samples 
keep if rgroup_1==1 | rgroup_5==1

/*Two-stage estimate of β (OLS estimates)*/
reg rile_econ hinc_quintile rgroup_5 right_left age age_sq male educationlevel married religiosity czech_republic finland germany greece italy poland spain switzerland i.electionyear 
predict k if e(sample), xb
gen resid = rile_econ-k
egen stresid=std(resid)
gen outlier = 0 if e(sample)
replace outlier = 1 if abs(stresid)>1.5
reg rile_econ hinc_quintile rgroup_5 right_left age age_sq male educationlevel married religiosity czech_republic finland germany greece italy poland spain switzerland i.electionyear  if outlier!=1
predict R, r                                                                                            /*OLS residual*/
gen R2=R^2                                                                                              /*squared residuals for glm fit*/
glm R2 hinc_quintile rgroup_5 voteturnout, family(gamma) link(log)                                      /*gamma reg on log(r2)*/
predict S2, mu                                                                                          /*fitted variances, exp(xb)*/
gen LOGLIK = -.5*(ln(S2)+(R2/S2))                                                                                                                              /*evaluating log likelihood*/
egen LL0 = sum(LOGLIK)                                                                                  /*summing log likelihood*/
display LL0

/*Updating beta and lambda coefficients*/
gen DLL=1                                                                                              /*initialize change in loglik*/
while DLL> 0.00001{                                    
drop R k resid stresid outlier                                                                                        
quietly: reg rile_econ hinc_quintile rgroup_5 right_left age age_sq male educationlevel married religiosity czech_republic finland germany greece italy poland spain switzerland i.electionyear  [aw=1/S2] /*WLS with variances as weights*/
predict k if e(sample), xb
gen resid = rile_econ-k
egen stresid=std(resid)
gen outlier = 0 if e(sample)
replace outlier = 1 if abs(stresid)>1.5
quietly: reg rile_econ hinc_quintile rgroup_5 right_left age age_sq male educationlevel married religiosity czech_republic finland germany greece italy poland spain switzerland i.electionyear [aw=1/S2] if outlier!=1 /*WLS with variances as weights*/
drop S2
predict R, r                                                                                           /*WLS residuals*/
replace R2 = R^2                                                                                       /*squared residuals for glm fit*/
est store BETA                                                                                         /*saving beta coefficient (hinc_qunitile)*/
quietly: glm R2 hinc_quintile rgroup_5 voteturnout, family(gamma) link(log)                            /*gamma reg on log(r2)*/
predict S2, mu                                                                                         /*fitted variances, exp(Xb)*/
est store LAMBDA                                                                                       /*saving lambda coefficeints (rinc_quntile)*/
replace LOGLIK = -.5*(ln(S2)+(R2/S2))                                                                                                                      /*evaluating log likelihood*/
egen LLN = sum(LOGLIK)                                                                                 /*summing log likelihood*/
di LLN
replace DLL=LLN-LL0                                                                                    /*assess convergence*/
replace LL0=LLN      
drop LLN
}

est table BETA LAMBDA, b star(.1 .05 .01) stats(N ll aic bic)                                           /*table with coefficeints, P<|Z|, and se's*/
est table BETA LAMBDA, b se stats(N ll aic bic) b(%7.3f) se(%7.3f)
drop R2 LOGLIK LL0 DLL k resid stresid outlier R _est_BETA S2 _est_LAMBDA

********************************************************************************
** Data Re-loading for Vote Choice Analysis Using a Binary Measure (Model 4 through Model 6)  
********************************************************************************
* Set a working directory
clear all
cd "PUT_YOUR_PATH_HERE\Table 1"

* Data Loading: Electoral District = TL3 Matching Case Only
use "masterdata_electoral_district.dta"

***************************************************************************************************************************************
* Dependent Variable (This already accounts for country-specific election year information, thereby no need to control for the fixed effects separately in the model 
***************************************************************************************************************************************
*Dependent Variable: Postive Value: Right - Negative Value: Left
drop if rile_binary_econ ==.
*Dependent Variable: Postive Value: More Redistributive - Negative Value: Less Redistributive
recode rile_binary_econ (0=1) (1=0), gen(rile_binary_econ_rev) 
tab rile_binary_econ_rev
tab rile_binary_econ

*****************************
** Key Independent Variables 
*****************************
*Regional productivity qunitile (Used the per capita GDP of an electoral district, drawn from the OECD Regional Economy Statistics) 
tab rinc_quintile_t1, missing
tab rinc_quintile_t1, gen(rgroup_)

*Household income quintile
tab IMD2006, missing
recode IMD2006 (7 8 9 =.), gen(IMD2006_rev)
tab IMD2006_rev, missing 
drop if rinc_quintile_t1==. | IMD2006_rev==. 
tab country, missing

*Renaming Key Independent Variables
rename rinc_quintile_t1 rinc_quintile     /*LOW: Poor, High: Rich*/
rename IMD2006_rev hinc_quintile          /*LOW: Poor, High: Rich*/ 

********************************
*Individual-Level Control Variables
********************************
*Self-identification: Right (0)-Left (10)
recode IMD3006 (95 97 98 99 =.), gen(left_right)
recode left_right (0=10) (1=9) (2=8) (3=7) (4=6) (5=5) (6=4) (7=3) (8=2) (9=1) (10=0), gen(right_left)
*Age (16-95)
recode IMD2001_1 (9997 9999 =.), gen(age)
tab age
*Age squared
gen age_sq = age^2
*Male (1=Yes, 0=No)
recode IMD2002 (1=1) (2=0) (9=.), gen(male)
tab male    
*Higher education (1=Yes, 0=No)
recode IMD2003 (0=0) (1 2 = 0) (3 4 =1) (6 7 8 9 =.), gen(educationlevel)
tab educationlevel
*Married (1= Yes, 0=No)                  
recode IMD2004 (1 3 =1) (2 4 =0) (5 7 9 =.), gen(married)
tab married
*Religiosity (1=Yes, 0 = No)                         
recode IMD2005 (1 2 3 4 5 7 8 9 11 = 1) (12 13 96 97  = 0) ( 98 99 =.), gen(religiosity)
tab religiosity

***********************************
*Group-level Control Variables
***********************************
*voter turnout %
rename IMD5006_1 voteturnout
*Assigning country id
egen countryid = group(country)

/*Country Name (Labeling)*/
tab countryid, gen(countryid_)
tab countryid country
rename countryid_1 austria
rename countryid_2 czech_republic
rename countryid_3 denmark
rename countryid_4 finland 
rename countryid_5 germany
rename countryid_6 greece
rename countryid_7 italy
rename countryid_8 latvia
rename countryid_9 norway
rename countryid_10 poland 
rename countryid_11 spain
rename countryid_12 switzerland
drop if denmark==1 | latvia==1 | norway==1    /*Due to missing observations*/

***************************************
** Model 4 (Binary Measure) Regression             ** 
***************************************
/*Two-stage estimate of β (logistic estimates)*/
glm rile_binary_econ_rev hinc_quintile rinc_quintile right_left age age_sq male educationlevel married religiosity, family(binomial) link(logit)
predict R, pearson                                                                                      /*Perason Residuals*/  
gen R2=R^2                                                                                              /*squared residuals for glm fit*/
glm R2 hinc_quintile rinc_quintile voteturnout, family(gamma) link(log)                                 /*gamma reg on log(r2)*/
predict S2, mu                                                                                          /*fitted variances, exp(xb)*/
gen LOGLIK = -.5*(ln(S2)+(R2/S2))                                                                                                                              /*evaluating log likelihood*/
egen LL0 = sum(LOGLIK)                                                                                  /*summing log likelihood*/
display LL0

/*Updating beta and lambda coefficients*/
gen DLL=1                                                                                              /*initialize change in loglik*/
while DLL> 0.00001{                                    
drop R                                             
quietly: glm rile_binary_econ_rev hinc_quintile rinc_quintile right_left age age_sq male educationlevel married religiosity [aw=1/S2], family(binomial) link(logit)  /*WLS with variances as weights*/
drop S2
predict R, pearson                                                                                     /*WLS residuals*/
replace R2 = R^2                                                                                       /*squared residuals for glm fit*/
est store BETA                                                                                         /*saving beta coefficient (hinc_qunitile)*/
quietly: glm R2 hinc_quintile rinc_quintile voteturnout, family(gamma) link(log)                       /*gamma reg on log(r2)*/
predict S2, mu                                                                                         /*fitted variances, exp(Xb)*/
est store LAMBDA                                                                                       /*saving lambda coefficeints (rinc_quntile)*/
replace LOGLIK = -.5*(ln(S2)+(R2/S2))                                                                                                                      /*evaluating log likelihood*/
egen LLN = sum(LOGLIK)                                                                                 /*summing log likelihood*/
di LLN
replace DLL=LLN-LL0                                                                                    /*assess convergence*/
replace LL0=LLN      
drop LLN
}

est table BETA LAMBDA, b star(.1 .05 .01) stats(N ll aic bic)                                          /*table with coefficeints, P<|Z|, and se's*/
est table BETA LAMBDA, b se stats(N ll aic bic)  b(%7.3f) se(%7.3f)
drop R2 LOGLIK LL0 DLL R _est_BETA S2 _est_LAMBDA

**************************************************************
** Model 5 (Binary Measure) + 5 Regional Income Group Dummies                   **
**************************************************************
/*Two-stage estimate of β (logistic estimates)*/
glm rile_binary_econ_rev hinc_quintile i.rinc_quintile right_left age age_sq male educationlevel married religiosity, family(binomial) link(logit)
predict R, pearson                                                   /*Perason Residuals*/  
gen R2=R^2                                                                                              /*squared residuals for glm fit*/
glm R2 hinc_quintile i.rinc_quintile voteturnout, family(gamma) link(log)                               /*gamma reg on log(r2)*/
predict S2, mu                                                                                          /*fitted variances, exp(xb)*/
gen LOGLIK = -.5*(ln(S2)+(R2/S2))                                                                                                                               /*evaluating log likelihood*/
egen LL0 = sum(LOGLIK)                                                                                  /*summing log likelihood*/
display LL0

/*Updating beta and lambda coefficients*/
gen DLL=1                                                                                              /*initialize change in loglik*/
while DLL> 0.00001{                                    
drop R                                             
quietly: glm rile_binary_econ_rev hinc_quintile i.rinc_quintile right_left age age_sq male educationlevel married religiosity [aw=1/S2], family(binomial) link(logit)  /*WLS with variances as weights*/
drop S2
predict R, pearson                                                                                     /*WLS residuals*/
replace R2 = R^2                                                                                       /*squared residuals for glm fit*/
est store BETA                                                                                         /*saving beta coefficient (hinc_qunitile)*/
quietly: glm R2 hinc_quintile i.rinc_quintile voteturnout, family(gamma) link(log)                     /*gamma reg on log(r2)*/
predict S2, mu                                                                                         /*fitted variances, exp(Xb)*/
est store LAMBDA                                                                                       /*saving lambda coefficeints (rinc_quntile)*/
replace LOGLIK = -.5*(ln(S2)+(R2/S2))                                                                                                                      /*evaluating log likelihood*/
egen LLN = sum(LOGLIK)                                                                                 /*summing log likelihood*/
di LLN                                                                                                 
replace DLL=LLN-LL0                                                                                    /*assess convergence*/
replace LL0=LLN      
drop LLN
}

est table BETA LAMBDA, b star(.1 .05 .01) stats(N ll aic bic)                                          /*table with coefficeints, P<|Z|, and se's*/
est table BETA LAMBDA, b se stats(N ll aic bic)  b(%7.3f) se(%7.3f)
drop R2 LOGLIK LL0 DLL R _est_BETA S2 _est_LAMBDA

*******************************************************************
** Model 6 (Binary Measure) + Richest and Poorest Income Group Dummies Only       **
*******************************************************************
keep if rgroup_1==1 | rgroup_5==1

/*Two-stage estimate of β (OLS estimates)*/
glm rile_binary_econ_rev hinc_quintile rgroup_5 right_left age age_sq male educationlevel married religiosity, family(binomial) link(logit) 
predict R, p                                                                                            /*Pearson residual*/
gen R2=R^2                                                                                              /*squared residuals for glm fit*/
glm R2 hinc_quintile rgroup_5 voteturnout, family(gamma) link(logit)                                    /*gamma reg on log(r2)*/
predict S2, mu                                                                                          /*fitted variances, exp(xb)*/
gen LOGLIK = -.5*(ln(S2)+(R2/S2))                                                                                                                              /*evaluating log likelihood*/
egen LL0 = sum(LOGLIK)                                                                                  /*summing log likelihood*/
display LL0

/*Updating beta and lambda coefficients*/
gen DLL=1                                                                                              /*initialize change in loglik*/
while DLL> 0.00001{                                    
drop R                                             
quietly: glm rile_binary_econ_rev hinc_quintile rgroup_5 right_left age age_sq male educationlevel married religiosity [aw=1/S2], family(binomial) link(logit) /*WLS with variances as weights*/
drop S2
predict R, p                                                                                           /*WLS residuals*/
replace R2 = R^2                                                                                       /*squared residuals for glm fit*/
est store BETA                                                                                         /*saving beta coefficient (hinc_qunitile)*/
quietly: glm R2 hinc_quintile rgroup_5 voteturnout, family(gamma) link(log)                            /*gamma reg on log(r2)*/
predict S2, mu                                                                                         /*fitted variances, exp(Xb)*/
est store LAMBDA                                                                                       /*saving lambda coefficeints (rinc_quntile)*/
replace LOGLIK = -.5*(ln(S2)+(R2/S2))                                                                                                                      /*evaluating log likelihood*/
egen LLN = sum(LOGLIK)                                                                                 /*summing log likelihood*/
di LLN
replace DLL=LLN-LL0                                                                                    /*assess convergence*/
replace LL0=LLN      
drop LLN
}

est table BETA LAMBDA, b star(.1 .05 .01) stats(N ll aic bic)                                          /*table with coefficeints, P<|Z|, and se's*/
est table BETA LAMBDA, b se stats(N ll aic bic)  b(%7.3f) se(%7.3f)
drop R2 LOGLIK LL0 DLL R _est_BETA S2 _est_LAMBDA
